{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "predict_mortality.ipynb",
      "provenance": [],
      "authorship_tag": "ABX9TyPc/oNwpJOh5Z13B1u1s559",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/vitaldb/examples/blob/master/predict_mortality.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "KClTefZmMuAk"
      },
      "source": [
        "#원내 사망 예측 모델\n",
        "본 예제에서는 vitaldb 데이터셋으로부터 술 후 원내 사망을 예측하는 모델을 개발해 본다."
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "1tnmS-S_MrAM",
        "outputId": "d6360124-b3af-4659-a333-c839a1926b72"
      },
      "source": [
        "# 데이터를 읽고 입출력 변수를 지정\n",
        "import pandas as pd\n",
        "OUTCOME_VAR = 'death_inhosp'\n",
        "INPUT_VARS = ['age', 'preop_hb', 'preop_alb', 'preop_ast', 'preop_cr']\n",
        "df = pd.read_csv('https://api.vitaldb.net/cases')\n",
        "df = df[df['asa'] < 6]\n",
        "ntrain = int(len(df) * 0.7)\n",
        "y_train = df.loc[:ntrain, OUTCOME_VAR]\n",
        "x_train = df.loc[:ntrain, INPUT_VARS]\n",
        "y_test = df.loc[ntrain:, OUTCOME_VAR]\n",
        "x_test = df.loc[ntrain:, INPUT_VARS]\n",
        "print('{}/{} train, {}/{} test'.format(sum(y_train), len(y_train), sum(y_test), len(y_test)))"
      ],
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "37/4273 train, 18/1970 test\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "KQ_hJ1FvNIGl",
        "outputId": "701ec834-04bb-44fb-9218-6712107af294"
      },
      "source": [
        "# ASA class\n",
        "from sklearn.metrics import roc_auc_score, auc, precision_recall_curve\n",
        "y_pred_asa = df.loc[ntrain:, 'asa']\n",
        "auroc_asa = roc_auc_score(y_test, y_pred_asa)\n",
        "prc_asa, rec_asa, thresholds = precision_recall_curve(y_test, y_pred_asa)\n",
        "auprc_asa = auc(rec_asa, prc_asa)\n",
        "print('ASA auroc: {:.3f}, auprc: {:.3f}'.format(auroc_asa, auprc_asa))"
      ],
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "ASA auroc: 0.900, auprc: 0.335\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "fJgNhKGhNJZw",
        "outputId": "ef285e3b-58a7-4a95-ec81-ec531e973bb5"
      },
      "source": [
        "# logistic regression\n",
        "from sklearn.impute import SimpleImputer\n",
        "imp = SimpleImputer().fit(x_train)\n",
        "x_train_imputed = imp.transform(x_train)\n",
        "x_test_imputed = imp.transform(x_test)\n",
        "from sklearn.linear_model import LogisticRegression\n",
        "model = LogisticRegression().fit(x_train_imputed, y_train)\n",
        "y_pred_lr = model.predict_proba(x_test_imputed)[:, 1]\n",
        "auroc_lr = roc_auc_score(y_test, y_pred_lr)\n",
        "prc_lr, rec_lr, thresholds = precision_recall_curve(y_test, y_pred_lr)\n",
        "auprc_lr = auc(rec_lr, prc_lr)\n",
        "print('LR auroc: {:.3f}, auprc: {:.3f}'.format(auroc_lr, auprc_lr))"
      ],
      "execution_count": 3,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "LR auroc: 0.947, auprc: 0.236\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "cell_type": "code",
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "Oe9U_XvNNJiC",
        "outputId": "3f1924de-adaf-426a-9d63-469877e5d701"
      },
      "source": [
        "# gradient boosting machine\n",
        "from xgboost import XGBClassifier\n",
        "model = XGBClassifier(learning_rate=0.07, max_depth=4, n_estimators=50, subsample=0.8, colsample_bytree=0.8, use_label_encoder=False, eval_metric='logloss')\n",
        "model.fit(x_train, y_train)\n",
        "y_pred_gbm = model.predict_proba(x_test)[:, 1]\n",
        "auroc_gbm = roc_auc_score(y_test, y_pred_gbm)\n",
        "prc_gbm, rec_gbm, thresholds = precision_recall_curve(y_test, y_pred_gbm)\n",
        "auprc_gbm = auc(rec_gbm, prc_gbm)\n",
        "print('GBM auroc: {:.3f}, auprc: {:.3f}'.format(auroc_gbm, auprc_gbm))"
      ],
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "GBM auroc: 0.964, auprc: 0.620\n"
          ],
          "name": "stdout"
        }
      ]
    }
  ]
}